
# Starting from two files:
# data-stations-NL-RR.txt (the data) and 
# info-stations-NL-RR.txt (the coordinates of monitoring stations)
# you will obtain 
# a matrix (alldata) of data in which the columns contains the daily data observed in one stations
# a matrix (coords) in which the rows contains the projected coordinates of each station

rm(list=ls())


country<-"NL" # country name
varname<-"RR" # variable RR = rainfall TX = max temperature

SY<-1999 # selected starting year 
EY<-2019 # selected ending year
sel.month<-c(10,11,12,1,2) # selected months of the year
PP<-0.9 ## we delete stations that contains more than (1-PP)100% of missing data

########## Reading the data 
stations<-read.csv(file=paste("info-stations-",country,"-",varname,".txt",sep = ""),header=TRUE)
row.names(stations)<-paste("S",stations$STAID,sep = "")
alldata<-read.csv(file=paste("data-stations-",country,"-",varname,".txt",sep = ""),header=TRUE)
names(alldata)[1]<-"day"

###### starting from SS  to xx+365*MM
sel.time<-(as.numeric(substring(as.Date(alldata$day),1,4))>=SY) &
  (as.numeric(substring(as.Date(alldata$day),1,4))<=EY)
alldata<-alldata[sel.time,]

# Further selection of stations according to their coordinates
# The coordinates below coorespond (approximately) to the 
# North Brabant region
sel1<-(stations$coord.dec.lat < 51.85)& (stations$coord.dec.lat > 51.25)
sel2<-(stations$coord.dec.lon < 5.48)& (stations$coord.dec.lon > 4.43)
alldata<-alldata[,c("day",row.names(stations)[sel1 & sel2])]
stations<-stations[row.names(stations)[sel1 & sel2],]


#### delete stations with more than  (1-PP)100% of NA
sel<-(as.logical(colSums(!is.na(alldata[,-1]))/nrow(alldata[,-1])>PP))
alldata<-alldata[,c("day",row.names(stations)[sel])]
stations<-stations[row.names(stations)[sel],]
dim(alldata)
nrow(which(is.na(alldata),arr.ind = TRUE))
vv<-unique(which(is.na(alldata),arr.ind = TRUE)[,2])
 


if (length(vv)>0){
# interpolation of the remaining data
for(i in 1:length(vv))
  alldata[,vv[i]]<-na_interpolation(alldata[,vv[i]])
}
library(xts)
days <- seq(from = as.Date(as.character(alldata$day[1]), format="%Y-%m-%d"),
            to = as.Date(as.character(alldata$day[nrow(alldata)]), format="%Y-%m-%d"),
            by = "day")
alldata.xts<-xts(alldata[,-1], order.by = days, frequency = 7)

sel<-(.indexmon(alldata.xts)+1) %in%sel.month
alldata.aw<-alldata.xts[sel,]
alldata<-as.matrix(alldata.aw)

coords<-cbind(stations$coord.dec.long,stations$coord.dec.lat)

# Montserrat Fuentes' program  (http://www4.stat.ncsu.edu/~fuentes/)
# to compute geodetic distance
rdistearth<-function(loc1, loc2, miles = FALSE ) {
  if (miles) 
    R <- 3963.34
  else R <- 6378.388	
  if(missing(loc2))
    loc2 <- loc1
  R <- 6371
  lat <- loc1[, 2]
  lon <- loc1[, 1]
  coslat1 <- cos((lat * pi)/180)
  sinlat1 <- sin((lat * pi)/180)
  coslon1 <- cos((lon * pi)/180)
  sinlon1 <- sin((lon * pi)/180)
  lat <- loc2[, 2]
  lon <- loc2[, 1]
  coslat2 <- cos((lat * pi)/180)
  sinlat2 <- sin((lat * pi)/180)
  coslon2 <- cos((lon * pi)/180)
  sinlon2 <- sin((lon * pi)/180)
  PP1 <- cbind(coslat1 * coslon1, coslat1 * sinlon1, sinlat1)
  PP2 <- cbind(coslat2 * coslon2, coslat2 * sinlon2, sinlat2)
  pp <- (PP1 %*% t(PP2))
  val<-R * acos(ifelse(pp > 1, 1, pp))
  return(val)
}



# transform the coordinates
# Montserrat Fuentes' projection on the plane Centered around the center of gravity

lonlat.to.planar<-  function(lon.lat, miles =FALSE) {
  x <- lon.lat[, 1]
  y <- lon.lat[, 2]
  mx <- mean(x)
  my <- mean(y)
  temp <- cbind(rep(mx, 2), range(y))
  sy <- rdistearth(temp)[2, 1]
  temp <- cbind(range(x), rep(my, 2))
  sx <- rdistearth(temp ,miles = miles)[2, 1]
  temp <- list(x = sx/(max(x) - min(x)), y = sy/(max(y) - min(y)))
  return(cbind((x - mx) * temp$x, (y - my) * temp$y))
}


coords<-lonlat.to.planar(coords)


alldata<-as.matrix(alldata)

# stations S20490 S20491 (lines 29,30) are identical
# (same coordinates, same observations)
# we remove station S20490

alldata<-alldata[,-29]
coords<-coords[-29,]

# Further removal of the two automatic stations 
# corresponding to lines 10 and 24
alldata<- alldata[,-c(10,24)]
coords<- coords[-c(10,24),]
row.names(alldata)<-NULL
